Data Visualization Mat no - 12200983 email - raviteja.anumula-naga@stud.th-deg.de Anumula Naga Raviteja
Installing Packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(forcats)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Loading required package: viridisLite
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(usmap)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:viridis':
##
## unemp
## The following object is masked from 'package:purrr':
##
## map
Exploratory Data Analysis
DengueDF <- read.csv('US.38362002.csv')
MalariaDF <- read.csv('US.61462000.csv')
WestNileDF <- read.csv('US.430397002.csv')
Head of the datasets:
head(DengueDF)
## ConditionName ConditionSNOMED PathogenName PathogenTaxonID Fatalities
## 1 Dengue 38362002 Dengue virus 12637 0
## 2 Dengue 38362002 Dengue virus 12637 0
## 3 Dengue 38362002 Dengue virus 12637 0
## 4 Dengue 38362002 Dengue virus 12637 0
## 5 Dengue 38362002 Dengue virus 12637 0
## 6 Dengue 38362002 Dengue virus 12637 0
## CountryName CountryISO Admin1Name Admin1ISO Admin2Name
## 1 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA>
## 2 UNITED STATES OF AMERICA US NEW YORK US-NY <NA>
## 3 UNITED STATES OF AMERICA US DISTRICT OF COLUMBIA US-DC <NA>
## 4 UNITED STATES OF AMERICA US GEORGIA US-GA <NA>
## 5 UNITED STATES OF AMERICA US COLORADO US-CO <NA>
## 6 UNITED STATES OF AMERICA US TENNESSEE US-TN <NA>
## CityName PeriodStartDate PeriodEndDate PartOfCumulativeCountSeries AgeRange
## 1 <NA> 2017-07-09 2017-07-15 0 0-130
## 2 <NA> 2017-07-30 2017-08-05 0 0-130
## 3 <NA> 2015-02-08 2015-02-14 0 0-130
## 4 <NA> 2016-08-28 2016-09-03 0 0-130
## 5 <NA> 2016-09-04 2016-09-10 0 0-130
## 6 <NA> 2016-08-28 2016-09-03 0 0-130
## Subpopulation PlaceOfAcquisition DiagnosisCertainty
## 1 None specified NA NA
## 2 None specified NA NA
## 3 None specified NA NA
## 4 None specified NA NA
## 5 None specified NA NA
## 6 None specified NA NA
## SourceName CountValue
## 1 US Nationally Notifiable Disease Surveillance System 1
## 2 US Nationally Notifiable Disease Surveillance System 1
## 3 US Nationally Notifiable Disease Surveillance System 1
## 4 US Nationally Notifiable Disease Surveillance System 1
## 5 US Nationally Notifiable Disease Surveillance System 1
## 6 US Nationally Notifiable Disease Surveillance System 1
head(MalariaDF)
## ConditionName ConditionSNOMED PathogenName PathogenTaxonID Fatalities
## 1 Malaria 61462000 Plasmodium 5820 0
## 2 Malaria 61462000 Plasmodium 5820 0
## 3 Malaria 61462000 Plasmodium 5820 0
## 4 Malaria 61462000 Plasmodium 5820 0
## 5 Malaria 61462000 Plasmodium 5820 0
## 6 Malaria 61462000 Plasmodium 5820 0
## CountryName CountryISO Admin1Name Admin1ISO Admin2Name CityName
## 1 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA> <NA>
## 2 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA> <NA>
## 3 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA> <NA>
## 4 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA> <NA>
## 5 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA> <NA>
## 6 UNITED STATES OF AMERICA US WISCONSIN US-WI <NA> <NA>
## PeriodStartDate PeriodEndDate PartOfCumulativeCountSeries AgeRange
## 1 1954-08-29 1954-09-04 0 0-130
## 2 1952-01-13 1952-01-19 0 0-130
## 3 1952-01-20 1952-01-26 0 0-130
## 4 1952-01-27 1952-02-02 0 0-130
## 5 1952-02-03 1952-02-09 0 0-130
## 6 1952-02-10 1952-02-16 0 0-130
## Subpopulation PlaceOfAcquisition DiagnosisCertainty
## 1 Civilian NA NA
## 2 None specified NA NA
## 3 None specified NA NA
## 4 None specified NA NA
## 5 None specified NA NA
## 6 None specified NA NA
## SourceName CountValue
## 1 US Nationally Notifiable Disease Surveillance System 1
## 2 US Nationally Notifiable Disease Surveillance System 2
## 3 US Nationally Notifiable Disease Surveillance System 2
## 4 US Nationally Notifiable Disease Surveillance System 1
## 5 US Nationally Notifiable Disease Surveillance System 2
## 6 US Nationally Notifiable Disease Surveillance System 2
head(WestNileDF)
## ConditionName ConditionSNOMED
## 1 Disorder of nervous system caused by West Nile virus 430397002
## 2 Disorder of nervous system caused by West Nile virus 430397002
## 3 Disorder of nervous system caused by West Nile virus 430397002
## 4 Disorder of nervous system caused by West Nile virus 430397002
## 5 Disorder of nervous system caused by West Nile virus 430397002
## 6 Disorder of nervous system caused by West Nile virus 430397002
## PathogenName PathogenTaxonID Fatalities CountryName
## 1 West Nile virus 11082 0 UNITED STATES OF AMERICA
## 2 West Nile virus 11082 0 UNITED STATES OF AMERICA
## 3 West Nile virus 11082 0 UNITED STATES OF AMERICA
## 4 West Nile virus 11082 0 UNITED STATES OF AMERICA
## 5 West Nile virus 11082 0 UNITED STATES OF AMERICA
## 6 West Nile virus 11082 0 UNITED STATES OF AMERICA
## CountryISO Admin1Name Admin1ISO Admin2Name CityName PeriodStartDate
## 1 US OHIO US-OH NA NA 2010-04-04
## 2 US OHIO US-OH NA NA 2011-09-04
## 3 US OHIO US-OH NA NA 2012-08-05
## 4 US OHIO US-OH NA NA 2013-09-22
## 5 US OHIO US-OH NA NA 2015-08-16
## 6 US MICHIGAN US-MI NA NA 2011-09-18
## PeriodEndDate PartOfCumulativeCountSeries AgeRange Subpopulation
## 1 2010-04-10 0 0-130 None specified
## 2 2011-09-10 0 0-130 None specified
## 3 2012-08-11 0 0-130 None specified
## 4 2013-09-28 0 0-130 None specified
## 5 2015-08-22 0 0-130 None specified
## 6 2011-09-24 0 0-130 None specified
## PlaceOfAcquisition DiagnosisCertainty
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## SourceName CountValue
## 1 US Nationally Notifiable Disease Surveillance System 3
## 2 US Nationally Notifiable Disease Surveillance System 1
## 3 US Nationally Notifiable Disease Surveillance System 2
## 4 US Nationally Notifiable Disease Surveillance System 1
## 5 US Nationally Notifiable Disease Surveillance System 1
## 6 US Nationally Notifiable Disease Surveillance System 1
Structure of the datasets: Since all the data is from Tycho the structure of the three datasets are similar, so let’s see the structure of one dataset.
str(DengueDF)
## 'data.frame': 4272 obs. of 20 variables:
## $ ConditionName : chr "Dengue" "Dengue" "Dengue" "Dengue" ...
## $ ConditionSNOMED : int 38362002 38362002 38362002 38362002 38362002 38362002 38362002 38362002 38362002 38362002 ...
## $ PathogenName : chr "Dengue virus" "Dengue virus" "Dengue virus" "Dengue virus" ...
## $ PathogenTaxonID : int 12637 12637 12637 12637 12637 12637 12637 12637 12637 12637 ...
## $ Fatalities : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CountryName : chr "UNITED STATES OF AMERICA" "UNITED STATES OF AMERICA" "UNITED STATES OF AMERICA" "UNITED STATES OF AMERICA" ...
## $ CountryISO : chr "US" "US" "US" "US" ...
## $ Admin1Name : chr "WISCONSIN" "NEW YORK" "DISTRICT OF COLUMBIA" "GEORGIA" ...
## $ Admin1ISO : chr "US-WI" "US-NY" "US-DC" "US-GA" ...
## $ Admin2Name : chr NA NA NA NA ...
## $ CityName : chr NA NA NA NA ...
## $ PeriodStartDate : chr "2017-07-09" "2017-07-30" "2015-02-08" "2016-08-28" ...
## $ PeriodEndDate : chr "2017-07-15" "2017-08-05" "2015-02-14" "2016-09-03" ...
## $ PartOfCumulativeCountSeries: int 0 0 0 0 0 0 0 0 0 0 ...
## $ AgeRange : chr "0-130" "0-130" "0-130" "0-130" ...
## $ Subpopulation : chr "None specified" "None specified" "None specified" "None specified" ...
## $ PlaceOfAcquisition : logi NA NA NA NA NA NA ...
## $ DiagnosisCertainty : logi NA NA NA NA NA NA ...
## $ SourceName : chr "US Nationally Notifiable Disease Surveillance System" "US Nationally Notifiable Disease Surveillance System" "US Nationally Notifiable Disease Surveillance System" "US Nationally Notifiable Disease Surveillance System" ...
## $ CountValue : int 1 1 1 1 1 1 1 1 1 1 ...
Importing USA flood data and population data:
FloodData <- read_csv("USAFloodReport.csv")
## Rows: 46 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): State, GlideNumber, Country, OtherCountry, Validation, MainCause
## dbl (7): long, lat, Area, Began, Dead, Displaced, Severity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
populationDF <- read_csv('PopulationUS.csv')
## Rows: 52 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): States
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(FloodData)
## spec_tbl_df [46 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ State : chr [1:46] "ILLINOIS" "NORTHÂ DAKOTA" "TEXAS" "IOWA" ...
## $ GlideNumber : chr [1:46] NA NA NA NA ...
## $ Country : chr [1:46] "USA" "USA" "USA" "USA" ...
## $ OtherCountry: chr [1:46] NA "Canada" NA NA ...
## $ long : num [1:46] -88.6 -97.6 -98.9 -93.4 -91.6 ...
## $ lat : num [1:46] 38.8 48.6 28.9 41.9 37.1 ...
## $ Area : num [1:46] 493137 43181 33908 123500 170079 ...
## $ Began : num [1:46] 2013 2013 2013 2013 2013 ...
## $ Validation : chr [1:46] "News" "News" "News" "News" ...
## $ Dead : num [1:46] 5 0 3 0 5 2 10 4 5 0 ...
## $ Displaced : num [1:46] 300 0 300 400 0 100 11000 4800 0 200 ...
## $ MainCause : chr [1:46] "Heavy Rain" "Heavy Rain and Snowmelt" "Heavy Rain" "Heavy Rain" ...
## $ Severity : num [1:46] 1.5 1 2 1 1.5 1.5 2 2 1 1.5 ...
## - attr(*, "spec")=
## .. cols(
## .. State = col_character(),
## .. GlideNumber = col_character(),
## .. Country = col_character(),
## .. OtherCountry = col_character(),
## .. long = col_double(),
## .. lat = col_double(),
## .. Area = col_double(),
## .. Began = col_double(),
## .. Validation = col_character(),
## .. Dead = col_double(),
## .. Displaced = col_double(),
## .. MainCause = col_character(),
## .. Severity = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
str(populationDF)
## spec_tbl_df [52 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ States : chr [1:52] "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ Census : num [1:52] 4779736 710231 6392017 2915918 37253956 ...
## $ Estimates Base: num [1:52] 4780125 710249 6392288 2916031 37254519 ...
## $ 2010 : num [1:52] 4785437 713910 6407172 2921964 37319502 ...
## $ 2011 : num [1:52] 4799069 722128 6472643 2940667 37638369 ...
## $ 2012 : num [1:52] 4815588 730443 6554978 2952164 37948800 ...
## $ 2013 : num [1:52] 4830081 737068 6632764 2959400 38260787 ...
## $ 2014 : num [1:52] 4841799 736283 6730413 2967392 38596972 ...
## $ 2015 : num [1:52] 4852347 737498 6829676 2978048 38918045 ...
## $ 2016 : num [1:52] 4863525 741456 6941072 2989918 39167117 ...
## $ 2017 : num [1:52] 4874486 739700 7044008 3001345 39358497 ...
## $ 2018 : num [1:52] 4887681 735139 7158024 3009733 39461588 ...
## $ 2019 : num [1:52] 4903185 731545 7278717 3017804 39512223 ...
## - attr(*, "spec")=
## .. cols(
## .. States = col_character(),
## .. Census = col_number(),
## .. `Estimates Base` = col_number(),
## .. `2010` = col_number(),
## .. `2011` = col_number(),
## .. `2012` = col_number(),
## .. `2013` = col_number(),
## .. `2014` = col_number(),
## .. `2015` = col_number(),
## .. `2016` = col_number(),
## .. `2017` = col_number(),
## .. `2018` = col_number(),
## .. `2019` = col_number()
## .. )
## - attr(*, "problems")=<externalptr>
For Dengue Data set
DengueDF <- DengueDF[c('Admin1Name','Admin1ISO','PeriodStartDate','CountValue')]
DengueDF$PeriodStartDate <- strtrim(DengueDF$PeriodStartDate, 4)
DengueDF$Admin1ISO <- gsub("US-","",as.character(DengueDF$Admin1ISO))
DengueDF <- DengueDF[DengueDF$PeriodStartDate >= "2013" & DengueDF$PeriodStartDate <= "2017",] %>% drop_na()
DengueDF <- setNames(aggregate(DengueDF$CountValue,by=list(DengueDF$Admin1Name,DengueDF$Admin1ISO,DengueDF$PeriodStartDate), FUN=sum), c("StateName","state","Year","Cases"))
For Malaria Data set
MalariaDF <- MalariaDF[c('Admin1Name','Admin1ISO','PeriodStartDate','CountValue')]
MalariaDF$PeriodStartDate <- strtrim(MalariaDF$PeriodStartDate, 4)
MalariaDF$Admin1ISO <- gsub("US-","",as.character(MalariaDF$Admin1ISO))
MalariaDF <- MalariaDF[MalariaDF$PeriodStartDate >= "2013" & MalariaDF$PeriodStartDate <= "2017",] %>% drop_na()
MalariaDF <- setNames(aggregate(MalariaDF$CountValue,by=list(MalariaDF$Admin1Name,MalariaDF$Admin1ISO,MalariaDF$PeriodStartDate), FUN=sum), c("StateName","state","Year","Cases"))
For West Nile Virus Data set
WestNileDF <- WestNileDF[c('Admin1Name','Admin1ISO','PeriodStartDate','CountValue')]
WestNileDF$PeriodStartDate <- strtrim(WestNileDF$PeriodStartDate, 4)
WestNileDF$Admin1ISO <- gsub("US-","",as.character(WestNileDF$Admin1ISO))
WestNileDF <- WestNileDF[WestNileDF$PeriodStartDate >= "2013" & WestNileDF$PeriodStartDate <= "2017",] %>% drop_na()
WestNileDF <- setNames(aggregate(WestNileDF$CountValue,by=list(WestNileDF$Admin1Name,WestNileDF$Admin1ISO,WestNileDF$PeriodStartDate), FUN=sum), c("StateName","state","Year","Cases"))
Removing States which are outside of USA
DengueDF <- subset(DengueDF, StateName!="WYOMING" & StateName!="GUAM" & StateName!="DISTRICE OF COLUMBIA" & StateName!="SOUTH DAKOTA" & StateName !="PUERTO RICO")
MalariaDF <- subset(MalariaDF, StateName!="WYOMING" & StateName!="GUAM" & StateName!="DISTRICE OF COLUMBIA" & StateName!="SOUTH DAKOTA" & StateName !="PUERTO RICO")
WestNileDF <- subset(WestNileDF, StateName!="WYOMING" & StateName!="GUAM" & StateName!="DISTRICE OF COLUMBIA" & StateName!="SOUTH DAKOTA" & StateName !="PUERTO RICO")
For Flood Data Set
FloodData <- setNames(FloodData[c(1,8)],c("state","Year"))
FloodDataDF <- as.data.frame(FloodData)
FloodDataDF$Year<-as.character(FloodDataDF$Year)
FloodDataDF$IsFlooded <- "Yes"
For Population Data Set
populationDF <- populationDF[c(1,7,8,9,10,11)]
populationDF <- setNames(cbind(populationDF[1],stack(populationDF[2:6])),c("State","Population","Year"))
populationDF$State <- toupper(populationDF$State)
DengueCases <- DengueDF[c(1,4)]
DengueCases <- setNames(aggregate(DengueCases$Cases,by=list(DengueCases$StateName), FUN=sum), c("state","Cases"))
plot_usmap(data = DengueCases, values = "Cases", color = "#0058F5",labels = TRUE) +
scale_fill_continuous(low = "white", high = "#0058F5", name = "Number of Cases", label = scales::comma)+
labs(title = "Dengue Cases over the years 2013-2017") +
theme(panel.background=element_blank())
MalariaCases <- MalariaDF[c(1,4)]
MalariaCases <- setNames(aggregate(MalariaCases$Cases,by=list(MalariaCases$StateName), FUN=sum), c("state","Cases"))
plot_usmap(data = MalariaCases, values = "Cases", color = "#0058F5",labels = TRUE) +
scale_fill_continuous(low = "white", high = "#0058F5", name = "Number of Cases", label = scales::comma)+
labs(title = "Malaria Cases over the years 2013-2017") +
theme(panel.background=element_blank())
WestNileCases <- WestNileDF[c(1,4)]
WestNileCases <- setNames(aggregate(WestNileCases$Cases,by=list(WestNileCases$StateName), FUN=sum), c("state","Cases"))
plot_usmap(data = WestNileCases, values = "Cases", color = "#0058F5",labels = TRUE) +
scale_fill_continuous(low = "white", high = "#0058F5", name = "Number of Cases", label = scales::comma)+
labs(title = "West Nile Virus Cases over the years 2013-2017") +
theme(panel.background=element_blank())
mal <- setNames(MalariaDF[c(3,4)],c("Year","MalariaCases"))
den <- setNames(DengueDF[c(3,4)],c("Year","DengueCases"))
wnv <- setNames(WestNileDF[c(3,4)],c("Year","WestNileCases"))
mal <- setNames(aggregate(mal$MalariaCases,by=list(mal$Year), FUN = sum), c("Year","Malaria"))
den <- setNames(aggregate(den$DengueCases,by=list(den$Year), FUN = sum), c("Year","Dengue"))
wnv <- setNames(aggregate(wnv$WestNileCases,by=list(wnv$Year), FUN = sum), c("Year","WestNileVirus"))
#put all data frames into list
MalDen <- right_join(den,mal,by = "Year")
dftest <- right_join(MalDen,wnv,by = "Year")
CombinedDF = dftest[,c(1,2,3,4)] %>% pivot_longer(c(2,3,4),names_to = 'Disease')
ggplot(data = CombinedDF, aes(x=Disease,y=value, color=Disease)) +
geom_boxplot()+
scale_fill_brewer(palette="Green") +
geom_jitter(shape=16, position=position_jitter(0.2))+
labs(title = 'Dengue VS Malaria VS West Nile Virus',
y='Cases',x='Disease')
## Warning in pal_name(palette, type): Unknown palette Green
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).
ggplot(data = CombinedDF, aes(x=Year, y=value, fill=Disease )) +
geom_bar(stat = "identity", position = "dodge") +
theme_ipsum() + # Arial Narrow
scale_fill_ipsum() +
labs(title = 'Dengue VS Malaria VS West Nile Virus',
y='Cases',x='Year')
## Warning: Removed 2 rows containing missing values (geom_bar).
DengueDF$Year<-as.character(DengueDF$Year)
FloodVsDengueDF <- right_join(FloodDataDF,DengueDF,by=c("state"="StateName","Year"="Year"))
FloodVsDengueDF <- FloodVsDengueDF[c(1,2,3,5)]
FloodVsDengueDF <- distinct(FloodVsDengueDF)
FloodVsDengueDF[is.na(FloodVsDengueDF)] <- "No"
ggplot(data = FloodVsDengueDF, aes(x=state, y=Cases, fill=IsFlooded )) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
labs(title = "Floods impact on Dengue disease")+
facet_wrap(~Year,nrow = 5)
MalariaDF$Year<-as.character(MalariaDF$Year)
FloodVsMalariaDF <- right_join(FloodDataDF,MalariaDF,by=c("state"="StateName","Year"="Year"))
FloodVsMalariaDF <- FloodVsMalariaDF[c(1,2,3,5)]
FloodVsMalariaDF <- distinct(FloodVsMalariaDF)
FloodVsMalariaDF[is.na(FloodVsMalariaDF)] <- "No"
ggplot(data = FloodVsMalariaDF, aes(x=state, y=Cases, fill=IsFlooded )) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
labs(title = "Floods impact on Malaria disease")+
facet_wrap(~Year,nrow = 5)
WestNileDF$Year<-as.character(WestNileDF$Year)
FloodVsWestNileDF <- right_join(FloodDataDF,WestNileDF,by=c("state"="StateName","Year"="Year"))
FloodVsWestNileDF <- FloodVsWestNileDF[c(1,2,3,5)]
FloodVsWestNileDF <- distinct(FloodVsWestNileDF)
FloodVsWestNileDF[is.na(FloodVsWestNileDF)] <- "No"
ggplot(data = FloodVsWestNileDF, aes(x=state, y=Cases, fill=IsFlooded )) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
labs(title = "Floods impact on West Nile Virus disease")+
facet_wrap(~Year,nrow = 5)
DengueDF$State <- toupper(DengueDF$State)
PopAndDengueDF <- right_join(populationDF,DengueDF,by=c("State"="StateName","Year"="Year")) %>% na.omit()
PopAndDengueDF <- PopAndDengueDF[,-c(6)]
PopAndDengueDF %>%
arrange(desc(Population)) %>%
# mutate(state = factor(state, state)) %>%
ggplot(aes(x=State, y=Cases, size=Population, color=Year)) +
geom_point(alpha=0.5) +
theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
labs(title = "Population Vs Dengue")+
scale_size(range = c(.1, 24), name="Population")
PopAndMalDF <- right_join(populationDF,MalariaDF,by=c("State"="StateName","Year"="Year")) %>% na.omit()
PopAndMalDF <- PopAndMalDF[,-c(6)]
PopAndMalDF %>%
arrange(desc(Population)) %>%
# mutate(state = factor(state, state)) %>%
ggplot(aes(x=State, y=Cases, size=Population, color=Year)) +
geom_point(alpha=0.5) +
theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
labs(title = "Population Vs Malaria")+
scale_size(range = c(.1, 24), name="Population")
PopAndWNVDF <- right_join(populationDF,WestNileDF,by=c("State"="StateName","Year"="Year")) %>% na.omit()
PopAndWNVDF <- PopAndWNVDF[,-c(6)]
PopAndWNVDF %>%
arrange(desc(Population)) %>%
# mutate(state = factor(state, state)) %>%
ggplot(aes(x=State, y=Cases, size=Population, color=Year)) +
geom_point(alpha=0.5) +
theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
labs(title = "Population Vs West Nile Virus")+
scale_size(range = c(.1, 24), name="Population (M)")